{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating summary statistics with *tableone*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This document demonstrates how the tableone package can be used to create a table of summary statistics for a patient cohort selected from MIMIC-III."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import the libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "from tableone import TableOne\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import psycopg2\n",
    "import getpass\n",
    "%matplotlib inline\n",
    "plt.style.use('ggplot') "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create the database connection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create a database connection\n",
    "user = 'postgres'\n",
    "host = 'localhost'\n",
    "dbname = 'mimic'\n",
    "schema = 'mimiciii_demo'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Password:········\n"
     ]
    }
   ],
   "source": [
    "# Connect to the database\n",
    "con = psycopg2.connect(dbname=dbname, user=user, host=host, \n",
    "                       password=getpass.getpass(prompt='Password:'.format(user)))\n",
    "cur = con.cursor()\n",
    "cur.execute('SET search_path to {}'.format(schema))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Select data on the first hospital stay"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Run query and assign the results to a Pandas DataFrame\n",
    "# Get first admission for each patient\n",
    "query = \\\n",
    "\"\"\"\n",
    "WITH admit AS (\n",
    "    SELECT p.gender, \n",
    "        ROUND( (CAST(EXTRACT(epoch FROM a.dischtime - a.admittime)/(60*60*24) AS numeric)), 4) AS los_hospital,\n",
    "        ROUND( (CAST(EXTRACT(epoch FROM a.admittime - p.dob)/(60*60*24*365.242) AS numeric)), 4) AS age, \n",
    "        DENSE_RANK() OVER (PARTITION BY a.subject_id ORDER BY a.admittime) AS admission_seq,\n",
    "        a.ethnicity, a.admission_type, a.insurance, a.religion, a.marital_status, a.hospital_expire_flag\n",
    "    FROM patients p\n",
    "    LEFT JOIN admissions a\n",
    "    ON p.subject_id = a.subject_id)\n",
    "SELECT *\n",
    "FROM admit\n",
    "WHERE admission_seq = 1;\n",
    "\"\"\"\n",
    "\n",
    "data = pd.read_sql_query(query,con)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Display the first few rows of the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Table 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">Grouped by admission_type</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>ELECTIVE</th>\n",
       "      <th>EMERGENCY</th>\n",
       "      <th>URGENT</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable</th>\n",
       "      <th>level</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>n</th>\n",
       "      <th></th>\n",
       "      <td>8</td>\n",
       "      <td>90</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>age</th>\n",
       "      <th></th>\n",
       "      <td>74.23 (11.05)</td>\n",
       "      <td>90.01 (68.12)</td>\n",
       "      <td>75.66 (4.29)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">ethnicity</th>\n",
       "      <th>WHITE</th>\n",
       "      <td>8 (100.0)</td>\n",
       "      <td>65 (72.22)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UNKNOWN/NOT SPECIFIED</th>\n",
       "      <td></td>\n",
       "      <td>9 (10.0)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BLACK/AFRICAN AMERICAN</th>\n",
       "      <td></td>\n",
       "      <td>6 (6.67)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">gender</th>\n",
       "      <th>F</th>\n",
       "      <td>5 (62.5)</td>\n",
       "      <td>48 (53.33)</td>\n",
       "      <td>2 (100.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M</th>\n",
       "      <td>3 (37.5)</td>\n",
       "      <td>42 (46.67)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">hospital_expire_flag</th>\n",
       "      <th>0</th>\n",
       "      <td>8 (100.0)</td>\n",
       "      <td>58 (64.44)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>32 (35.56)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">insurance</th>\n",
       "      <th>Medicare</th>\n",
       "      <td>5 (62.5)</td>\n",
       "      <td>70 (77.78)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Private</th>\n",
       "      <td>3 (37.5)</td>\n",
       "      <td>15 (16.67)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Medicaid</th>\n",
       "      <td></td>\n",
       "      <td>4 (4.44)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>los_hospital</th>\n",
       "      <th></th>\n",
       "      <td>11.67 (11.79)</td>\n",
       "      <td>9.86 (14.43)</td>\n",
       "      <td>6.26 (0.81)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">marital_status</th>\n",
       "      <th>MARRIED</th>\n",
       "      <td>4 (50.0)</td>\n",
       "      <td>36 (48.0)</td>\n",
       "      <td>1 (50.0)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SINGLE</th>\n",
       "      <td>2 (25.0)</td>\n",
       "      <td>18 (24.0)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WIDOWED</th>\n",
       "      <td>1 (12.5)</td>\n",
       "      <td>13 (17.33)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">religion</th>\n",
       "      <th>CATHOLIC</th>\n",
       "      <td>6 (75.0)</td>\n",
       "      <td>33 (37.08)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UNOBTAINABLE</th>\n",
       "      <td></td>\n",
       "      <td>16 (17.98)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NOT SPECIFIED</th>\n",
       "      <td>1 (12.5)</td>\n",
       "      <td>14 (15.73)</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            Grouped by admission_type  \\\n",
       "                                                             ELECTIVE   \n",
       "variable             level                                              \n",
       "n                                                                   8   \n",
       "age                                                     74.23 (11.05)   \n",
       "ethnicity            WHITE                                  8 (100.0)   \n",
       "                     UNKNOWN/NOT SPECIFIED                              \n",
       "                     BLACK/AFRICAN AMERICAN                             \n",
       "gender               F                                       5 (62.5)   \n",
       "                     M                                       3 (37.5)   \n",
       "hospital_expire_flag 0                                      8 (100.0)   \n",
       "                     1                                                  \n",
       "insurance            Medicare                                5 (62.5)   \n",
       "                     Private                                 3 (37.5)   \n",
       "                     Medicaid                                           \n",
       "los_hospital                                            11.67 (11.79)   \n",
       "marital_status       MARRIED                                 4 (50.0)   \n",
       "                     SINGLE                                  2 (25.0)   \n",
       "                     WIDOWED                                 1 (12.5)   \n",
       "religion             CATHOLIC                                6 (75.0)   \n",
       "                     UNOBTAINABLE                                       \n",
       "                     NOT SPECIFIED                           1 (12.5)   \n",
       "\n",
       "                                                                          \n",
       "                                                 EMERGENCY        URGENT  \n",
       "variable             level                                                \n",
       "n                                                       90             2  \n",
       "age                                          90.01 (68.12)  75.66 (4.29)  \n",
       "ethnicity            WHITE                      65 (72.22)      1 (50.0)  \n",
       "                     UNKNOWN/NOT SPECIFIED        9 (10.0)      1 (50.0)  \n",
       "                     BLACK/AFRICAN AMERICAN       6 (6.67)                \n",
       "gender               F                          48 (53.33)     2 (100.0)  \n",
       "                     M                          42 (46.67)                \n",
       "hospital_expire_flag 0                          58 (64.44)      1 (50.0)  \n",
       "                     1                          32 (35.56)      1 (50.0)  \n",
       "insurance            Medicare                   70 (77.78)      1 (50.0)  \n",
       "                     Private                    15 (16.67)      1 (50.0)  \n",
       "                     Medicaid                     4 (4.44)                \n",
       "los_hospital                                  9.86 (14.43)   6.26 (0.81)  \n",
       "marital_status       MARRIED                     36 (48.0)      1 (50.0)  \n",
       "                     SINGLE                      18 (24.0)                \n",
       "                     WIDOWED                    13 (17.33)                \n",
       "religion             CATHOLIC                   33 (37.08)                \n",
       "                     UNOBTAINABLE               16 (17.98)                \n",
       "                     NOT SPECIFIED              14 (15.73)                "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Columns to include in the summary table\n",
    "columns = ['gender', 'los_hospital', 'age', 'ethnicity','admission_type', 'insurance', \n",
    "           'religion', 'marital_status','hospital_expire_flag']\n",
    "\n",
    "# List of categorical variables\n",
    "categorical = ['gender', 'ethnicity', 'insurance', 'religion', 'marital_status',\n",
    "           'hospital_expire_flag']\n",
    "\n",
    "# Group the data\n",
    "groupby = 'admission_type'\n",
    "\n",
    "# Display the top n number of categorical variables\n",
    "limit = 3\n",
    "\n",
    "# Compute p values\n",
    "pval = False\n",
    "\n",
    "# Display a count of null values\n",
    "isnull = False\n",
    "\n",
    "t = TableOne(data, columns=columns, categorical=categorical, \n",
    "         groupby=groupby, limit=limit, pval=pval, isnull=isnull)\n",
    "\n",
    "t.tableone"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
